import re
import json
import numpy as np

from tqdm import tqdm
from pathlib import Path


def calculate_metrics(score_item, dataset, language):
    clinical_diagnosis_fluency_list = []
    clinical_diagnosis_relevance_list = []
    clinical_diagnosis_completeness_list = []
    clinical_diagnosis_proficiency_list = []

    imaging_diagnosis_fluency_list = []
    imaging_diagnosis_relevance_list = []
    imaging_diagnosis_completeness_list = []
    imaging_diagnosis_proficiency_list = []

    error_clinical_case_uid_list = set()
    pattern = r'\{([^{}]+)\}'
    for index in tqdm(range(len(dataset))):
        if dataset[index]['clinical_case_uid'] in choose_clinical_case_uid_list:
            clinical_case_uid = dataset[index]['clinical_case_uid']
            judge_clinical_diagnosis = dataset[index]['judge_clinical_diagnosis']
            matches = re.search(pattern, judge_clinical_diagnosis, re.DOTALL)
            if matches:
                str_judge_clinical_diagnosis = matches.group().replace('医学知识专业性', '专业性')
                try:
                    json_judge_clinical_diagnosis = json.loads(str_judge_clinical_diagnosis)
                    if language == 'zh':
                        clinical_diagnosis_fluency_list.append(json_judge_clinical_diagnosis['流畅性'])
                        clinical_diagnosis_relevance_list.append(json_judge_clinical_diagnosis['相关性'])
                        clinical_diagnosis_completeness_list.append(json_judge_clinical_diagnosis['完整性'])
                        clinical_diagnosis_proficiency_list.append(json_judge_clinical_diagnosis['专业性'])
                    else:
                        clinical_diagnosis_fluency_list.append(json_judge_clinical_diagnosis['fluency'])
                        clinical_diagnosis_relevance_list.append(json_judge_clinical_diagnosis['relevance'])
                        clinical_diagnosis_completeness_list.append(json_judge_clinical_diagnosis['completeness'])
                        clinical_diagnosis_proficiency_list.append(json_judge_clinical_diagnosis['proficiency'])
                except Exception as e:
                    print(repr(e))
                    error_clinical_case_uid_list.add(clinical_case_uid)
                    clinical_diagnosis_fluency_list.append(0)
                    clinical_diagnosis_relevance_list.append(0)
                    clinical_diagnosis_completeness_list.append(0)
                    clinical_diagnosis_proficiency_list.append(0)
            else:
                raise Exception(clinical_case_uid)

            if isinstance(dataset[index]['imageological_examination'], dict):
                for imageological_examination_part_feature in dataset[index]['imageological_examination'].keys():
                    judge_imaging_diagnosis = \
                        dataset[index]['imageological_examination'][imageological_examination_part_feature][
                            'judge_imaging_diagnosis']
                    matches = re.search(pattern, judge_imaging_diagnosis, re.DOTALL)
                    if matches:
                        str_judge_imaging_diagnosis = matches.group().replace('医学知识专业性', '专业性')
                        try:
                            json_judge_imaging_diagnosis = json.loads(str_judge_imaging_diagnosis)
                            if language == 'zh':
                                imaging_diagnosis_fluency_list.append(json_judge_imaging_diagnosis['流畅性'])
                                imaging_diagnosis_relevance_list.append(json_judge_imaging_diagnosis['相关性'])
                                imaging_diagnosis_completeness_list.append(json_judge_imaging_diagnosis['完整性'])
                                imaging_diagnosis_proficiency_list.append(json_judge_imaging_diagnosis['专业性'])
                            else:
                                imaging_diagnosis_fluency_list.append(json_judge_imaging_diagnosis['fluency'])
                                imaging_diagnosis_relevance_list.append(json_judge_imaging_diagnosis['relevance'])
                                imaging_diagnosis_completeness_list.append(json_judge_imaging_diagnosis['completeness'])
                                imaging_diagnosis_proficiency_list.append(json_judge_imaging_diagnosis['proficiency'])
                        except Exception as e:
                            print(repr(e))
                            error_clinical_case_uid_list.add(clinical_case_uid)
                            imaging_diagnosis_fluency_list.append(0)
                            imaging_diagnosis_relevance_list.append(0)
                            imaging_diagnosis_completeness_list.append(0)
                            imaging_diagnosis_proficiency_list.append(0)
                    else:
                        raise Exception(clinical_case_uid)

    clinical_diagnosis_fluency = round(float(np.mean(clinical_diagnosis_fluency_list)), 2)
    clinical_diagnosis_relevance = round(float(np.mean(clinical_diagnosis_relevance_list)), 2)
    clinical_diagnosis_completeness = round(float(np.mean(clinical_diagnosis_completeness_list)), 2)
    clinical_diagnosis_proficiency = round(float(np.mean(clinical_diagnosis_proficiency_list)), 2)
    clinical_diagnosis_average = round(float(np.mean(
        [clinical_diagnosis_fluency, clinical_diagnosis_relevance, clinical_diagnosis_completeness,
         clinical_diagnosis_proficiency])), 2)
    score_item['clinical_diagnosis'] = {
        'fluency': clinical_diagnosis_fluency,
        'relevance': clinical_diagnosis_relevance,
        'completeness': clinical_diagnosis_completeness,
        'proficiency': clinical_diagnosis_proficiency,
        'average': clinical_diagnosis_average
    }
    imaging_diagnosis_fluency = round(float(np.mean(imaging_diagnosis_fluency_list)), 2)
    imaging_diagnosis_relevance = round(float(np.mean(imaging_diagnosis_relevance_list)), 2)
    imaging_diagnosis_completeness = round(float(np.mean(imaging_diagnosis_completeness_list)), 2)
    imaging_diagnosis_proficiency = round(float(np.mean(imaging_diagnosis_proficiency_list)), 2)
    imaging_diagnosis_average = round(float(np.mean(
        [imaging_diagnosis_fluency, imaging_diagnosis_relevance, imaging_diagnosis_completeness,
         imaging_diagnosis_proficiency])), 2)
    score_item['imaging_diagnosis'] = {
        'fluency': imaging_diagnosis_fluency,
        'relevance': imaging_diagnosis_relevance,
        'completeness': imaging_diagnosis_completeness,
        'proficiency': imaging_diagnosis_proficiency,
        'average': imaging_diagnosis_average
    }
    score_item['average'] = round(float(np.mean([clinical_diagnosis_average, imaging_diagnosis_average])), 2)

    print(clinical_diagnosis_fluency_list)
    print(clinical_diagnosis_relevance_list)
    print(clinical_diagnosis_completeness_list)
    print(clinical_diagnosis_proficiency_list)

    print(imaging_diagnosis_fluency_list)
    print(imaging_diagnosis_relevance_list)
    print(imaging_diagnosis_completeness_list)
    print(imaging_diagnosis_proficiency_list)

    print(error_clinical_case_uid_list)
    print(len(error_clinical_case_uid_list))

    return score_item


def main():
    score_dict = {}
    score_dict['code'] = 0
    score_dict['data'] = []
    for model_name in model_name_list:
        print(model_name)
        judge_load_name = f'judge_{language}_{model_name}_by_gpt4o.json'
        judge_load_path = judge_dir / Path(judge_load_name)
        with open(judge_load_path, mode='r', encoding='utf-8') as file:
            dataset = json.load(file)

        score_item = {}
        score_item['model'] = model_name_mapping_dict[model_name]
        score_item['institution'] = institution_name_mapping_dict[model_name]
        score_item['url'] = institution_url_mapping_dict[model_name]

        score_item = calculate_metrics(score_item, dataset, language)
        print(score_item)

        score_dict['data'].append(score_item)

    print(score_dict)

    score_dict['data'].sort(key=lambda x: x['average'], reverse=True)
    print(score_dict)

    score_save_path = score_dir / Path(score_save_name)
    with open(str(score_save_path), mode='w', encoding='utf-8') as file:
        json.dump(score_dict, file, ensure_ascii=False, indent=2)


if __name__ == '__main__':

    language = 'zh'
    score_save_name = 'score_gpt4(2024-05-27).json'

    model_name_list = [
        'agent@3@1@gt',
        'agent@1@3',
        'agent@1@3@gt',
        'agent@1@1',
        'geminipro',
        'gpt4',
        'internlm2chat',
        'yichat',
    ]

    model_name_mapping_dict = {
        'agent@1@3': 'Agen@1@3',
        'agent@1@3@gt': 'Agent@1@3-gt',
        'agent@3@1@gt': 'Agen@3@1-gt',
        'agent@1@1': 'Agent@1@1',
        'baichuan2chat': 'Baichuan2-13B-Chat',  # https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat
        'bianque2': 'BianQue-2',  # https://huggingface.co/scutcyr/BianQue-2
        'bluelmchat': 'BlueLM-7B-Chat',  # https://huggingface.co/vivo-ai/BlueLM-7B-Chat
        'chatglm3': 'ChatGLM3-6B',  # https://huggingface.co/THUDM/chatglm3-6b
        'claude3': 'Claude-3',  # https://www.anthropic.com/news/claude-3-haiku
        'discmedllm': 'DISC-MedLLM',  # https://huggingface.co/Flmc/DISC-MedLLM
        'geminipro': 'Gemini-Pro',  # https://ai.google.dev/models/gemini
        'gpt35': 'GPT-3.5',  # https://platform.openai.com/docs/models/gpt-3-5
        'gpt4': 'GPT-4',  # https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
        'huatuogpt2': 'HuatuoGPT2-34B',  # https://huggingface.co/FreedomIntelligence/HuatuoGPT2-34B
        'internlm2chat': 'InternLM2-20B-Chat',  # https://huggingface.co/internlm/internlm2-chat-20b
        'pulse': 'PULSE-20B',  # https://huggingface.co/OpenMEDLab/PULSE-20bv5
        'qwenchat': 'Qwen-72B-Chat',  # https://huggingface.co/Qwen/Qwen-72B-Chat
        'spark3': 'Spark-3',  # https://xinghuo.xfyun.cn/
        'taiyillm': 'Taiyi-LLM',  # https://huggingface.co/DUTIR-BioNLP/Taiyi-LLM
        'wingpt2': 'WiNGPT2-14B-Chat',  # https://huggingface.co/winninghealth/WiNGPT2-14B-Chat
        'yichat': 'Yi-34B-Chat',  # https://huggingface.co/01-ai/Yi-34B-Chat
    }

    institution_name_mapping_dict = {
        'agent@1@3': 'Vaneval AI',
        'agent@1@3@gt': 'Vaneval AI',
        'agent@3@1@gt': 'Vaneval AI',
        'agent@1@1': 'Vaneval AI',
        'baichuan2chat': 'Baichuan AI',
        'bianque2': 'SCUT-FT',
        'bluelmchat': 'Vivo',
        'chatglm3': 'THUDM & Zhipu AI',
        'claude3': 'Anthropic',
        'discmedllm': 'Fudan-DISC',
        'geminipro': 'Google',
        'gpt35': 'OpenAI',
        'gpt4': 'OpenAI',
        'huatuogpt2': 'CUHK-Shenzhen',
        'internlm2chat': 'Shanghai AI Laboratory',
        'pulse': 'Shanghai AI Laboratory',
        'qwenchat': 'Alibaba Cloud',
        'spark3': 'iFLYTEK',
        'taiyillm': 'DUTIR-BioNLP',
        'wingpt2': 'Winning Health',
        'yichat': '01 AI',
    }

    institution_url_mapping_dict = {
        'agent@1@3': 'http://www.vaneval.com/',
        'agent@1@3@gt': 'http://www.vaneval.com/',
        'agent@3@1@gt': 'http://www.vaneval.com/',
        'agent@1@1': 'http://www.vaneval.com/',
        'baichuan2chat': 'https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat',
        'bianque2': 'https://huggingface.co/scutcyr/BianQue-2',
        'bluelmchat': 'https://huggingface.co/vivo-ai/BlueLM-7B-Chat',
        'chatglm3': 'https://huggingface.co/THUDM/chatglm3-6b',
        'claude3': 'https://www.anthropic.com/news/claude-3-haiku',
        'discmedllm': 'https://huggingface.co/Flmc/DISC-MedLLM',
        'geminipro': 'https://ai.google.dev/models/gemini',
        'gpt35': 'https://platform.openai.com/docs/models/gpt-3-5',
        'gpt4': 'https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo',
        'huatuogpt2': 'https://huggingface.co/FreedomIntelligence/HuatuoGPT2-34B',
        'internlm2chat': 'https://huggingface.co/internlm/internlm2-chat-20b',
        'pulse': 'https://huggingface.co/OpenMEDLab/PULSE-20bv5',
        'qwenchat': 'https://huggingface.co/Qwen/Qwen-72B-Chat',
        'spark3': 'https://xinghuo.xfyun.cn/',
        'taiyillm': 'https://huggingface.co/DUTIR-BioNLP/Taiyi-LLM',
        'wingpt2': 'https://huggingface.co/winninghealth/WiNGPT2-14B-Chat',
        'yichat': 'https://huggingface.co/01-ai/Yi-34B-Chat',
    }

    choose_clinical_case_uid_list = [
        '02f44d8903084e60a0f99dcda5bcf9db', '3faeed5576ee4af48e5cc24fe4a62287',
        '6c9bb0a4cdf14527992a33650644c474', 'd6b83de7be7f420bb236e9386a957b72',
        'df10ac2333fb46c58ecc9bef01ad5b35', 'e7209a8a35e24de388bf6cbb188659e0',
        'a3fdb884378242299380a2e8e9b6ef67', '332a965d05134d2b8d98adfaa5bd1a4d',
        'df0da11d54b9415eba4ed3f7ef8f9a57', 'ad0bafc4d7f5460480e51db7aec9a420',
        'b9cd720b36df4045908cdea51e109931', 'c1cb3becfd2644d28b4942ce861163d0',
        '141667f866d3424cb4e9f08f1efba65f', '17745a2aa5d84da1986559235703dff1',
        '799af1812d42424d9b47edad171d82ba', '886347fc423b4a3989a22d1df71c3fa1',
        'b68e11c0427b4223a75efb64ec5cedf9', 'a4deec95bbd14a5aae11ac4b647e2348',
        'c49c15043f8546699ccc6c1cb1a6141e', '8e4380583c3a4cfd9f57755b67318da0',
        '5f29cccee56740f39569957148c0dc0d', '500aed0b82704977b0b3d4cfc2996d94',
        '6f5b146a331844bf8e6a2bb2cb081452', '42db4d30ed5b4ef1859e83f9dcdead82',
        '1b246bb198cd4d0bbce956342233ed55', 'd6a8347f03c54e66bea1ee2c1bf96eb7',
        '09341b4e236b45c6a2be7d1d1cbe0ac5', 'e7a082a25268417e869085c2fe87dfa6',
        '740342fb075d4c9cbd2af63e7eb17767', 'bf58697d094a4f148a6ce88c4d7510ec',
        '62034b6e268f4f189620f093c30619ae', 'ffaaab9fa35d45f59c04ff9c02ed076b',
        '31a483c38f4e4ba4b4c4924ed14cbae6', 'a7c642ca02ec434a972a1b39df9bc8d0',
        '276057f22da24aea9c0f658b66505a43', '23719caf760244b1b190d32e66a961c7',
        '8794002df418405e97f73877aba13b6d', '4a30e57e565e4863801dc6f74786fc37',
        'ae1f91894d7b45ae92abdfee832b429a', '767504eb77fa456fadec295af717e7ef',
        'ad44be7fb7864ab89385ce033561325e', 'ec925f2f2ec5438d991869032a47bc7a',
        'fefdc222d71e4e68876679b1178d113d', '117d57b4c1e64097a51dc29a94c8f0be',
        '23ef0751c9a64ad1921958dc00708ff2', '4afa8231b36a49d7a712b242f5849b24',
        '6936a98a81bf430484c297647a7c2e30', '20263fa0b7aa48978f90d5c46c674fa9',
        'c0b86381689b4f19892658ca18249881', '81a1abb6bfc7432a8b25047ca10d7ff7',
        '91dd236832764acb82ae5ff74ae01063', 'e33fd4605cec4771a750bce9eb92ff38',
        '3efd19eead9146699acd96c37bc563d1', '33ef80d49ed3491c889adab5a6f7ce6e',
        '761369fddc844032b4d0eb8c95acebee', '5e781a437f5846d6b273a9259d58e72e',
        '25a4f426a8fd4c418835765c7699a130', '94d8abab8a4643cc91c2443e96f00027',
        '1c946352b7014b40a81f815f7ff06a26', 'fe555cb9c33e4d93b2b4012516c69d0e',
        '0e887c2fdb714d9cb0c1a27d45d8b890', 'f7f3f9b5ad054f579e94e0d29c2a42a9',
        '2ef8fc2c1f5d40f8a9676c6f2d6d6f69', '3cde10f267914cefa497b1e6e939ab67',
        '4f5a67d1eb4e424892eacac92ead4285', '8074a8da21bd42eba90ec55c662e65df',
        '7cf3d52524ac4cbb970c86a6150b680d', '67ca0ff193e142c484444f3071f59e11',
        'fdb82ebb0c7d4723b9bd2c35bcb17a60', '302ea4f048df47e2a0a64b51aca6d70c',
        'e913c1388f1546e88bdd5bd4560f634e', '291c3737bbb64555a5a7251b540e0186',
        '7706a04e4494472e8717cd539c355375', '306eb172fcd34e3d90a06b0e7776b8f4',
        'ed05276db7fc410dbed5da3152e4c000', '5e4c0f0781ef49fca2a95c36ac9c05f3',
        '0cb22862a26c430eb1841d8e6b2ef66d', 'b3c9e046aa2b4ad3860858a750864a85',
        'b707c7332dc74965b0fc93bb9d72c40d', 'f5ee4a04929f46caad71938389dd160a',
        'cc9d4fafb1a749008e6db23b7d186652', 'e79cc90ece864933a6a1486c6403c0fe',
        'ded9247fef134b98a7b0d28877877ab2', 'a64dcbc4f86e4a7da25c795dfb3f1169',
        'c8a96a0e2d894b5380e03d2dba28c795', 'da31596b546249899b077a5ba91662e7',
        'fbd447f58e264cfc84df8f8a596003bb', '24107305ba5b4274961d43bcc7b21abb',
        'fef6d537653b4b30872536879624fd90', '28affa5510484ac18b502af672587bd6',
        'ba3b867e45164498b20b26bb08796c22', '21f7f573964a402e84abaca1440bb4a9',
        '8dba5a15092a460cba4f748d838e00ca', '6e02de8299614bd786e55a4412c152df',
        'e75945fc8b80457286d4c3536592949d', '0ff0f7a139fa48ee82026cff4c3bf1e0',
        '45d22746bf094e0582bf47e5891ac49a', '82239e3738e64233a6f4e0ae272290eb',
        '1b31b766acdc4c479a6d0bed8e5141e6', 'c10559da9df94e69b855bd70daf0ddcc',
        'e4bacf01d77b4e9cb08b2c0422b3b3aa', '88b86ce6f5a34edc88524ce86652f9c5',
        '9c3c3c19515e4d0594d1a883338b44a5', 'b604d357937d45f29297692da3104641',
        '08640e0de9d649ebbb0c41764391fac5', '18701ef6020a43a399c43d58abd26d65',
        '5b53edbf5e964d87ad7f0801f4c264c2', '54c1bc8338874b0caa11bf215a01363c',
        '97db0b630a304414aa40575fb3f14686', 'd11fbb10780a4948b8716c3099b23505',
        '8e8816fcc7aa4b279e6e972be3305d73', '6a723a95b6e4465eb3cb031be96460cb',
        '53d0685f335f418389ac8321d2890877', '39f1d81666404949878c3c6062c332a5',
        '0e93d178bcec44cab8a47c70abac839a', '24d914d6a7c6489e914aede55ae4e9d8',
        '228dc109e1d4479aa3e4fa6844ed8e36', '0fad21ac8a6a4d79b4d93e24e0f7af9a',
        'a7678541f00245b4ac4b02d0b46903e4', '0945713082af41248efbfdcbd6e12c2c',
        'e2c3c29e95954cbea35ab671cc5cc95d', 'dab9be7a8d654d31b02e41f97a047ce2',
        'd463eb84751e47da8a2fe518b8ec8910', '194c1fa97a274d64a94b2160597d46a5',
        '8f30cbb011f140219b9259b010992691', 'f8688ecbc40c4dfe87cb58ff7d9cd211',
        '5a7834026a96437ebd2aff980d42900e', '5f68080ef9554deba86011c8b0be58af',
        '9e0012ca5e784342986ff409a7d9c5c8', 'fc70237d28e6411bb4f249a99ba4ab71',
        'f25073ce455344c3aa4a2232c75e3748', '03a5b961b4e84662975daed75c518900',
        '31dc1f6562044ff1b5fdbacbb26e5b0e', 'd9f4d33d9ed045cba18e7304afb9e293',
        'cb24a62608bf43edba92674acbd9713a', '981d46239fec4345aeafb08991f4be00',
        '8e480098e8384b42b8951eeec5a8813c', 'c75c2f308e5d40daba172956627a7447',
        'ef32cb78e8b64378921d2d8c5d3700c4', '31a99abb8b0a4ab9ab05efd16541167f',
        '3ae0fea3de0641199a8ed81ced292451', 'bb1d7a8a86dd40e192c035e5d6bb2375',
        'b0dcc8c3a8354e26ba6888ab30612a79', 'ea4dbcc68b794729857814079d2b4104',
        '44ccfbb14c9a432b9f62803aae1c35d1', '017ff34ad9b04639a65ba22379934f5e',
        'c5e5c6595b3b491d9ac1ae93a2c6af32', 'c59476693e854ff3945fbfd37ce96767',
        '18e7eda6c8f4446c8507cd5b69ce6400', '1b01c7d72ac2421faf3324db09da7ba0'
    ]

    judge_dir = Path(__file__).parent.parent / Path('judges')
    if not judge_dir.is_dir():
        judge_dir.mkdir(parents=True, exist_ok=True)
    score_dir = Path(__file__).parent.parent / Path('scores')
    if not score_dir.is_dir():
        score_dir.mkdir(parents=True, exist_ok=True)

    main()
